import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import pickle
from decision_company import read_csv_file, convert_to_datetime

# Load the dataset
atp_tennis = read_csv_file(os.path.join(sys.argv[1], 'atp_tennis.csv'))

# Convert the 'Date' column to datetime format
atp_tennis['Date'] = convert_to_datetime(atp_tennis['Date'])

print(atp_tennis['Date'])
# pickle.dump(atp_tennis['Date'],open("./ref_result/atp_tennis_Date.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, get_max

# Load the dataset

# Find the last date in the dataset
last_date = get_max(atp_tennis['Date'])

print(last_date)
# pickle.dump(last_date,open("./ref_result/last_date.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, create_date_offset, filter_by_date

# Load the dataset

# Filter the dataset to only include matches from the last five years
five_years_ago = last_date - create_date_offset(years=5)
recent_matches = filter_by_date(atp_tennis, 'Date', five_years_ago)

print(recent_matches)
# pickle.dump(recent_matches,open("./ref_result/recent_matches.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, get_min_value, sort_by_values, get_first_n_rows, concatenate_objects, rename_columns, bind_dataframe

# Load the dataset

concatenated_data = concatenate_objects(
    recent_matches[['Player_1', 'Rank_1']],
    rename_columns(recent_matches[['Player_2', 'Rank_2']], {'Player_2': 'Player_1', 'Rank_2': 'Rank_1'})
)
grouped_data = bind_dataframe(concatenated_data, 'Player_1')
min_values = get_min_value(grouped_data)
sorted_data = sort_by_values(min_values, 'Rank_1')
top_10_ranked_players = get_first_n_rows(sorted_data, 10)

print(top_10_ranked_players)
# pickle.dump(top_10_ranked_players,open("./ref_result/top_10_ranked_players.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, to_list, reset_index

# Load the dataset

# Convert the top 10 ranked players and their rankings to a Python list
top_10_ranked_players_list = to_list(reset_index(top_10_ranked_players, drop=False))

print(top_10_ranked_players_list)
# pickle.dump(top_10_ranked_players_list,open("./ref_result/top_10_ranked_players_list.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


# Remove any extra spaces from the player names
top_10_player_names = [player[0].strip() for player in top_10_ranked_players_list]

print(top_10_player_names)
# pickle.dump(top_10_player_names,open("./ref_result/top_10_player_names.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, check_elements_in_list, assert_series, logical_and, logical_or


# Filter the dataset to only include grass court matches played by the top 10 players in the last five years
player_1_in_top_10 = check_elements_in_list(recent_matches['Player_1'], top_10_player_names)
player_2_in_top_10 = check_elements_in_list(recent_matches['Player_2'], top_10_player_names)
matches_with_top_10_players = logical_or(player_1_in_top_10, player_2_in_top_10)

matches_on_grass = assert_series(recent_matches['Surface'], 'Grass', 'equality')

grass_matches = recent_matches[logical_and(matches_on_grass, matches_with_top_10_players)]

print(grass_matches)
# pickle.dump(grass_matches,open("./ref_result/grass_matches.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, count_unique_values, fill_missing_values


# Calculate the number of matches played by each player
matches_played = count_unique_values(grass_matches['Player_1']) + count_unique_values(grass_matches['Player_2'])
matches_played = fill_missing_values(matches_played[top_10_player_names],0)

print(matches_played)
# pickle.dump(matches_played,open("./ref_result/matches_played.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, count_unique_values, fill_missing_values


# Calculate the number of matches won by each player
matches_won = count_unique_values(grass_matches['Winner'])
matches_won = fill_missing_values(matches_won[top_10_player_names],0)

print(matches_won)
# pickle.dump(matches_won,open("./ref_result/matches_won.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


# Calculate the win rate percentage for each player
win_rate_percentage = (matches_won / matches_played) * 100

print(win_rate_percentage)
# pickle.dump(win_rate_percentage,open("./ref_result/win_rate_percentage.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, check_elements_in_list


head_to_head_matches = grass_matches[(check_elements_in_list(grass_matches['Player_1'], top_10_player_names)) & (check_elements_in_list(grass_matches['Player_2'], top_10_player_names))]

print(head_to_head_matches)
# pickle.dump(head_to_head_matches,open("./ref_result/head_to_head_matches.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, create_zeros_array, create_dataframe


# Assuming grass_matches and top_10_player_names are already defined
head_to_head_array = create_zeros_array((10, 10))
head_to_head_matrix = create_dataframe(head_to_head_array, columns=top_10_player_names, index=top_10_player_names)

print(head_to_head_matrix)
# pickle.dump(head_to_head_matrix,open("./ref_result/head_to_head_matrix.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, iterate_rows, access_dataframe_loc, update_dataframe_loc


for index, row in iterate_rows(head_to_head_matches):
    winner = row['Winner']
    loser = row['Player_1'] if row['Player_1'] != winner else row['Player_2']
    current_value = access_dataframe_loc(head_to_head_matrix, winner, loser)
    update_dataframe_loc(head_to_head_matrix, winner, loser, current_value + 1)

print(head_to_head_matrix)
# pickle.dump(head_to_head_matrix,open("./ref_result/head_to_head_matrix.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


rounds = ['Early Round', 'Quarterfinals', 'Semifinals', 'Final']

print(rounds)
# pickle.dump(rounds,open("./ref_result/rounds.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


performance_data_list = []

print(performance_data_list)
# pickle.dump(performance_data_list,open("./ref_result/performance_data_list.pkl","wb"))

